From e3a0a15196ac9cce77791cbc7ea407bee1e447d0 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Sun, 21 Dec 2003 14:19:58 +0000 Subject: [PATCH] bitkeeper revision 1.653.1.4 (3fe5ac0esxJ46xgoeERN1TvSw4953g) dev.c, xen_block.c, memory.c: Various fixes for new page-management world. --- xen/common/memory.c | 148 +++++++++++++--------------------- xen/drivers/block/xen_block.c | 34 +++++--- xen/net/dev.c | 29 ++++--- 3 files changed, 95 insertions(+), 116 deletions(-) diff --git a/xen/common/memory.c b/xen/common/memory.c index c2b4ee9f7a..aeddc3ffe2 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -172,7 +172,6 @@ unsigned int free_pfns; static struct { #define DOP_FLUSH_TLB (1<<0) /* Flush the TLB. */ #define DOP_RELOAD_LDT (1<<1) /* Reload the LDT shadow mapping. */ -#define DOP_RESTORE_CR0 (1<<2) /* Set the WP bit in CR0. */ unsigned long flags; unsigned long cr0; } deferred_op[NR_CPUS] __cacheline_aligned; @@ -316,7 +315,7 @@ static int get_page_from_pagenr(unsigned long page_nr) } if ( unlikely(!get_page(page, current)) && - ((current->domain != 0) || !dom0_get_page(page)) ) + unlikely((current->domain != 0) || !dom0_get_page(page)) ) { MEM_LOG("Could not get page reference for pfn %08lx\n", page_nr); return 0; @@ -372,12 +371,10 @@ static int get_page_from_l1e(l1_pgentry_t l1e) { ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT); - if ( unlikely((l1_pgentry_val(l1e) & - (_PAGE_GLOBAL|_PAGE_PAT))) ) + if ( unlikely((l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT))) ) { MEM_LOG("Bad L1 page type settings %04lx", - l1_pgentry_val(l1e) & - (_PAGE_GLOBAL|_PAGE_PAT)); + l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT)); return 0; } @@ -388,14 +385,10 @@ static int get_page_from_l1e(l1_pgentry_t l1e) return 0; set_bit(_PGC_tlb_flush_on_type_change, &frame_table[l1_pgentry_to_pagenr(l1e)].count_and_flags); - } - else - { - if ( unlikely(!get_page_from_pagenr(l1_pgentry_to_pagenr(l1e))) ) - return 0; + return 1; } - return 1; + return get_page_from_pagenr(l1_pgentry_to_pagenr(l1e)); } @@ -412,9 +405,8 @@ static int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) } if ( unlikely(!get_page_and_type_from_pagenr( - l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) && - unlikely(!check_linear_pagetable(l2e, pfn)) ) - return 0; + l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) ) + return check_linear_pagetable(l2e, pfn); return 1; } @@ -422,12 +414,10 @@ static int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) static void put_page_from_l1e(l1_pgentry_t l1e) { - struct pfn_info *page; + struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)]; ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT); - page = &frame_table[l1_pgentry_to_pagenr(l1e)]; - if ( l1_pgentry_val(l1e) & _PAGE_RW ) { put_page_and_type(page); @@ -613,34 +603,30 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT ) { /* Differ in mapping (bits 12-31) or presence (bit 0)? */ - if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) != 0 ) - { - if ( unlikely(!get_page_from_l2e(nl2e, pfn)) ) - return 0; - - if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) - { - put_page_from_l2e(nl2e, pfn); - return 0; - } + if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 ) + return update_l2e(pl2e, ol2e, nl2e); - if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT ) - put_page_from_l2e(ol2e, pfn); - } - else if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) - { + if ( unlikely(!get_page_from_l2e(nl2e, pfn)) ) return 0; - } - } - else - { + if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) + { + put_page_from_l2e(nl2e, pfn); return 0; - + } + if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT ) put_page_from_l2e(ol2e, pfn); + + return 1; } - + + if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) + return 0; + + if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT ) + put_page_from_l2e(ol2e, pfn); + return 1; } @@ -652,26 +638,15 @@ static inline int update_l1e(l1_pgentry_t *pl1e, unsigned long o = l1_pgentry_val(ol1e); unsigned long n = l1_pgentry_val(nl1e); - while ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) + if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) || + unlikely(o != l1_pgentry_val(ol1e)) ) { - unsigned int cpu = smp_processor_id(); - /* The CMPXCHG faulted -- maybe we need to clear the WP bit. */ - if ( deferred_op[cpu].flags & DOP_RESTORE_CR0 ) - { - MEM_LOG("cmpxchg fault despite WP bit cleared\n"); - return 0; - } - deferred_op[cpu].cr0 = read_cr0(); - write_cr0(deferred_op[cpu].cr0 & ~X86_CR0_WP); - deferred_op[cpu].flags |= DOP_RESTORE_CR0; - } - - if ( o != l1_pgentry_val(ol1e)) MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n", l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o); + return 0; + } - /* The swap was successful if the old value we saw is equal to ol1e. */ - return (o == l1_pgentry_val(ol1e)); + return 1; } @@ -691,38 +666,31 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e) if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT ) { - /* - * Differ in mapping (bits 12-31), writeable (bit 1), or - * presence (bit 0)? - */ - if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) != 0 ) - { - if ( unlikely(!get_page_from_l1e(nl1e)) ) - return 0; + /* Differ in mapping (bits 12-31), r/w (bit 1), or presence (bit 0)? */ + if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) == 0 ) + return update_l1e(pl1e, ol1e, nl1e); - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) - { - put_page_from_l1e(nl1e); - return 0; - } - - if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT ) - put_page_from_l1e(ol1e); - } - else if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) - { + if ( unlikely(!get_page_from_l1e(nl1e)) ) return 0; - } - } - else - { + if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) + { + put_page_from_l1e(nl1e); return 0; - + } + if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT ) put_page_from_l1e(ol1e); + + return 1; } + if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) + return 0; + + if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT ) + put_page_from_l1e(ol1e); + return 1; } @@ -738,12 +706,16 @@ int alloc_page_type(struct pfn_info *page, unsigned int type) * NB. 'p' may no longer be valid by time we dereference it, so * p->processor might be garbage. We clamp it, just in case. */ - if ( !test_bit(_PGC_zombie, &page->count_and_flags) && - unlikely(NEED_FLUSH(tlbflush_time[(p->processor)&(NR_CPUS-1)], - page->tlbflush_timestamp)) ) + if ( likely(!test_bit(_PGC_zombie, &page->count_and_flags)) ) { - perfc_incr(need_flush_tlb_flush); - flush_tlb_cpu(p->processor); + unsigned int cpu = p->processor; + if ( likely(cpu <= smp_num_cpus) && + unlikely(NEED_FLUSH(tlbflush_time[cpu], + page->tlbflush_timestamp)) ) + { + perfc_incr(need_flush_tlb_flush); + flush_tlb_cpu(cpu); + } } } @@ -1053,9 +1025,6 @@ int do_mmu_update(mmu_update_t *ureqs, int count) if ( flags & DOP_RELOAD_LDT ) (void)map_ldt_shadow_page(0); - if ( unlikely(flags & DOP_RESTORE_CR0) ) - write_cr0(deferred_op[cpu].cr0); - return rc; } @@ -1087,9 +1056,6 @@ int do_update_va_mapping(unsigned long page_nr, if ( unlikely(defer_flags & DOP_RELOAD_LDT) ) (void)map_ldt_shadow_page(0); - - if ( unlikely(defer_flags & DOP_RESTORE_CR0) ) - write_cr0(deferred_op[cpu].cr0); - + return err; } diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c index 8b1cb119e6..878420a045 100644 --- a/xen/drivers/block/xen_block.c +++ b/xen/drivers/block/xen_block.c @@ -433,7 +433,8 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) phys_seg_t phys_seg[MAX_BLK_SEGS * 2]; /* Check that number of segments is sane. */ - if ( (req->nr_segments == 0) || (req->nr_segments > MAX_BLK_SEGS) ) + if ( unlikely(req->nr_segments == 0) || + unlikely(req->nr_segments > MAX_BLK_SEGS) ) { DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); goto bad_descriptor; @@ -450,18 +451,12 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) buffer = req->buffer_and_sects[i] & ~0x1FF; nr_sects = req->buffer_and_sects[i] & 0x1FF; - if ( nr_sects == 0 ) + if ( unlikely(nr_sects == 0) ) { DPRINTK("zero-sized data request\n"); goto bad_descriptor; } - if ( !lock_buffer(p, buffer, nr_sects<<9, (operation==READ)) ) - { - DPRINTK("invalid buffer\n"); - goto bad_descriptor; - } - phys_seg[nr_psegs].dev = req->device; phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects; phys_seg[nr_psegs].buffer = buffer; @@ -480,7 +475,6 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) req->sector_number + tot_sects, req->sector_number + tot_sects + nr_sects, req->device); - unlock_buffer(buffer, nr_sects<<9, (operation==READ)); goto bad_descriptor; } @@ -494,7 +488,22 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) } nr_psegs += new_segs; - if ( nr_psegs >= (MAX_BLK_SEGS*2) ) BUG(); + ASSERT(nr_psegs <= MAX_BLK_SEGS*2); + } + + for ( i = 0; i < nr_psegs; i++ ) + { + if ( unlikely(!lock_buffer(p, phys_seg[i].buffer, + phys_seg[i].nr_sects << 9, + operation==READ)) ) + { + DPRINTK("invalid buffer\n"); + while ( i-- > 0 ) + unlock_buffer(phys_seg[i].buffer, + phys_seg[i].nr_sects << 9, + operation==READ); + goto bad_descriptor; + } } atomic_inc(&nr_pending); @@ -512,8 +521,9 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) for ( i = 0; i < nr_psegs; i++ ) { bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); - if ( bh == NULL ) panic("bh is null\n"); - memset (bh, 0, sizeof (struct buffer_head)); + if ( unlikely(bh == NULL) ) + panic("bh is null\n"); + memset(bh, 0, sizeof (struct buffer_head)); bh->b_size = phys_seg[i].nr_sects << 9; bh->b_dev = phys_seg[i].dev; diff --git a/xen/net/dev.c b/xen/net/dev.c index 91d6a4e0cf..1ec0b5d8cf 100644 --- a/xen/net/dev.c +++ b/xen/net/dev.c @@ -522,6 +522,8 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) old_page = &frame_table[rx->buf_pfn]; new_page = skb->pf; + skb->pf = old_page; + ptep = map_domain_mem(rx->pte_ptr); new_page->u.domain = p; @@ -541,6 +543,8 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) ((new_page - frame_table) << PAGE_SHIFT))) != pte ) { unmap_domain_mem(ptep); + /* At some point maybe should have 'new_page' in error response. */ + put_page_and_type(new_page); status = RING_STATUS_BAD_PAGE; goto out; } @@ -550,9 +554,6 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) unmap_domain_mem(ptep); - /* Our skbuff now points at the guest's old frame. */ - skb->pf = old_page; - /* Updates must happen before releasing the descriptor. */ smp_wmb(); @@ -2078,17 +2079,13 @@ static void get_rx_bufs(net_vif_t *vif) * just once as a writeable page. */ if ( unlikely(buf_page->u.domain != p) || - unlikely(!test_and_clear_bit(_PGC_allocated, - &buf_page->count_and_flags)) || unlikely(cmpxchg(&buf_page->type_and_flags, PGT_writeable_page|PGT_validated|1, 0) != (PGT_writeable_page|PGT_validated|1)) ) { DPRINTK("Bad domain or page mapped writeable more than once.\n"); - if ( buf_page->u.domain == p ) - set_bit(_PGC_allocated, &buf_page->count_and_flags); - if ( unlikely(cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != - (pte & ~_PAGE_PRESENT)) ) + if ( cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != + (pte & ~_PAGE_PRESENT) ) put_page_and_type(buf_page); make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); goto rx_unmap_and_continue; @@ -2099,11 +2096,17 @@ static void get_rx_bufs(net_vif_t *vif) * The final count should be 2, because of PGC_allocated. */ if ( unlikely(cmpxchg(&buf_page->count_and_flags, - PGC_tlb_flush_on_type_change | 2, 0) != - (PGC_tlb_flush_on_type_change | 2)) ) + PGC_allocated | PGC_tlb_flush_on_type_change | 2, + 0) != + (PGC_allocated | PGC_tlb_flush_on_type_change | 2)) ) { - DPRINTK("Page held more than once\n"); - /* Leave the page unmapped at 'ptep'. Stoopid domain! */ + DPRINTK("Page held more than once %08lx\n", + buf_page->count_and_flags); + if ( get_page_type(buf_page, PGT_writeable_page) && + (cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != + (pte & ~_PAGE_PRESENT)) ) + put_page_and_type(buf_page); + /* NB. If we fail to remap the page, we should probably flag it. */ make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); goto rx_unmap_and_continue; } -- 2.30.2